{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# pandas_groupby常用8个用法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-03-31T14:11:38.153545Z",
     "start_time": "2021-03-31T14:11:37.408024Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal length (cm)</th>\n",
       "      <th>sepal width (cm)</th>\n",
       "      <th>petal length (cm)</th>\n",
       "      <th>petal width (cm)</th>\n",
       "      <th>target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5.1</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4.9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4.7</td>\n",
       "      <td>3.2</td>\n",
       "      <td>1.3</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4.6</td>\n",
       "      <td>3.1</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5.0</td>\n",
       "      <td>3.6</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>145</th>\n",
       "      <td>6.7</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.2</td>\n",
       "      <td>2.3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>146</th>\n",
       "      <td>6.3</td>\n",
       "      <td>2.5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.9</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>147</th>\n",
       "      <td>6.5</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.2</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>148</th>\n",
       "      <td>6.2</td>\n",
       "      <td>3.4</td>\n",
       "      <td>5.4</td>\n",
       "      <td>2.3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149</th>\n",
       "      <td>5.9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.1</td>\n",
       "      <td>1.8</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \\\n",
       "0                  5.1               3.5                1.4               0.2   \n",
       "1                  4.9               3.0                1.4               0.2   \n",
       "2                  4.7               3.2                1.3               0.2   \n",
       "3                  4.6               3.1                1.5               0.2   \n",
       "4                  5.0               3.6                1.4               0.2   \n",
       "..                 ...               ...                ...               ...   \n",
       "145                6.7               3.0                5.2               2.3   \n",
       "146                6.3               2.5                5.0               1.9   \n",
       "147                6.5               3.0                5.2               2.0   \n",
       "148                6.2               3.4                5.4               2.3   \n",
       "149                5.9               3.0                5.1               1.8   \n",
       "\n",
       "     target  \n",
       "0         0  \n",
       "1         0  \n",
       "2         0  \n",
       "3         0  \n",
       "4         0  \n",
       "..      ...  \n",
       "145       2  \n",
       "146       2  \n",
       "147       2  \n",
       "148       2  \n",
       "149       2  \n",
       "\n",
       "[150 rows x 5 columns]"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from sklearn import datasets\n",
    "\n",
    "# 加载数据集和目标\n",
    "data, target = datasets.load_iris(return_X_y=True, as_frame=True)\n",
    "# 合并数据集和目标\n",
    "iris = pd.concat([data, target], axis=1, sort=False)\n",
    "iris"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-03-31T14:11:38.165661Z",
     "start_time": "2021-03-31T14:11:38.156054Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal length (cm)</th>\n",
       "      <th>sepal width (cm)</th>\n",
       "      <th>petal length (cm)</th>\n",
       "      <th>petal width (cm)</th>\n",
       "      <th>target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>108</th>\n",
       "      <td>6.7</td>\n",
       "      <td>2.5</td>\n",
       "      <td>5.8</td>\n",
       "      <td>1.8</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>5.0</td>\n",
       "      <td>3.2</td>\n",
       "      <td>1.2</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>83</th>\n",
       "      <td>6.0</td>\n",
       "      <td>2.7</td>\n",
       "      <td>5.1</td>\n",
       "      <td>1.6</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>96</th>\n",
       "      <td>5.7</td>\n",
       "      <td>2.9</td>\n",
       "      <td>4.2</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>76</th>\n",
       "      <td>6.8</td>\n",
       "      <td>2.8</td>\n",
       "      <td>4.8</td>\n",
       "      <td>1.4</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \\\n",
       "108                6.7               2.5                5.8               1.8   \n",
       "35                 5.0               3.2                1.2               0.2   \n",
       "83                 6.0               2.7                5.1               1.6   \n",
       "96                 5.7               2.9                4.2               1.3   \n",
       "76                 6.8               2.8                4.8               1.4   \n",
       "\n",
       "     target  \n",
       "108       2  \n",
       "35        0  \n",
       "83        1  \n",
       "96        1  \n",
       "76        1  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "iris.sample(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-03-31T14:11:38.171228Z",
     "start_time": "2021-03-31T14:11:38.168023Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.groupby.generic.DataFrameGroupBy"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "iris_gb = iris.groupby('target')\n",
    "type(iris_gb)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 创建频率表\n",
    "假如我想知道每个species类中的数量有多少，那么直接使用groupby的size函数即可"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-03-31T14:11:38.177663Z",
     "start_time": "2021-03-31T14:11:38.173301Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "target\n",
       "0    50\n",
       "1    50\n",
       "2    50\n",
       "dtype: int64"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "iris_gb.size()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 计算常用的描述统计量\n",
    "\n",
    "min、max()、medianhe、std等"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-03-31T14:11:38.188037Z",
     "start_time": "2021-03-31T14:11:38.179404Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal length (cm)</th>\n",
       "      <th>sepal width (cm)</th>\n",
       "      <th>petal length (cm)</th>\n",
       "      <th>petal width (cm)</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>target</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5.006</td>\n",
       "      <td>3.428</td>\n",
       "      <td>1.462</td>\n",
       "      <td>0.246</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5.936</td>\n",
       "      <td>2.770</td>\n",
       "      <td>4.260</td>\n",
       "      <td>1.326</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>6.588</td>\n",
       "      <td>2.974</td>\n",
       "      <td>5.552</td>\n",
       "      <td>2.026</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        sepal length (cm)  sepal width (cm)  petal length (cm)  \\\n",
       "target                                                           \n",
       "0                   5.006             3.428              1.462   \n",
       "1                   5.936             2.770              4.260   \n",
       "2                   6.588             2.974              5.552   \n",
       "\n",
       "        petal width (cm)  \n",
       "target                    \n",
       "0                  0.246  \n",
       "1                  1.326  \n",
       "2                  2.026  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 计算均值\n",
    "iris_gb.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-03-31T14:11:38.193864Z",
     "start_time": "2021-03-31T14:11:38.189700Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "target\n",
       "0    5.006\n",
       "1    5.936\n",
       "2    6.588\n",
       "Name: sepal length (cm), dtype: float64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 单列\n",
    "iris_gb['sepal length (cm)'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-03-31T14:11:38.202716Z",
     "start_time": "2021-03-31T14:11:38.195432Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal length (cm)</th>\n",
       "      <th>sepal width (cm)</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>target</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5.006</td>\n",
       "      <td>3.428</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5.936</td>\n",
       "      <td>2.770</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>6.588</td>\n",
       "      <td>2.974</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        sepal length (cm)  sepal width (cm)\n",
       "target                                     \n",
       "0                   5.006             3.428\n",
       "1                   5.936             2.770\n",
       "2                   6.588             2.974"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 双列\n",
    "iris_gb[['sepal length (cm)', 'sepal width (cm)']].mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 查找最大值（最小值）索引\n",
    "查找每个组的最大值或最小值的索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-03-31T14:11:38.215529Z",
     "start_time": "2021-03-31T14:11:38.205731Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal length (cm)</th>\n",
       "      <th>sepal width (cm)</th>\n",
       "      <th>petal length (cm)</th>\n",
       "      <th>petal width (cm)</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>target</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>14</td>\n",
       "      <td>15</td>\n",
       "      <td>24</td>\n",
       "      <td>43</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>50</td>\n",
       "      <td>85</td>\n",
       "      <td>83</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>131</td>\n",
       "      <td>117</td>\n",
       "      <td>118</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        sepal length (cm)  sepal width (cm)  petal length (cm)  \\\n",
       "target                                                           \n",
       "0                      14                15                 24   \n",
       "1                      50                85                 83   \n",
       "2                     131               117                118   \n",
       "\n",
       "        petal width (cm)  \n",
       "target                    \n",
       "0                     43  \n",
       "1                     70  \n",
       "2                    100  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "iris_gb.idxmax()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "想查找每组sepal_length最大值对应的整条记录时，就可以这样用。注意，这里是整条记录，相当于按sepal_length最大值这个条件进行了筛选。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-03-31T14:11:38.226870Z",
     "start_time": "2021-03-31T14:11:38.217545Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal length (cm)</th>\n",
       "      <th>sepal width (cm)</th>\n",
       "      <th>petal length (cm)</th>\n",
       "      <th>petal width (cm)</th>\n",
       "      <th>target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>5.8</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.2</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td>7.0</td>\n",
       "      <td>3.2</td>\n",
       "      <td>4.7</td>\n",
       "      <td>1.4</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>131</th>\n",
       "      <td>7.9</td>\n",
       "      <td>3.8</td>\n",
       "      <td>6.4</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \\\n",
       "14                 5.8               4.0                1.2               0.2   \n",
       "50                 7.0               3.2                4.7               1.4   \n",
       "131                7.9               3.8                6.4               2.0   \n",
       "\n",
       "     target  \n",
       "14        0  \n",
       "50        1  \n",
       "131       2  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sepal_largest = iris.loc[iris_gb['sepal length (cm)'].idxmax()]\n",
    "sepal_largest"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Groupby之后重置索引\n",
    "很多时候，在groupby处理后还要进行其他操作。也就是说，我们想重置分组索引以使其成为正常的行和列。\n",
    "第一种方法可能大家常用，就是通过reset_index()让乱序索引重置。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-03-31T14:11:38.238001Z",
     "start_time": "2021-03-31T14:11:38.228479Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>target</th>\n",
       "      <th>sepal length (cm)</th>\n",
       "      <th>sepal width (cm)</th>\n",
       "      <th>petal length (cm)</th>\n",
       "      <th>petal width (cm)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>5.8</td>\n",
       "      <td>4.4</td>\n",
       "      <td>1.9</td>\n",
       "      <td>0.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>7.0</td>\n",
       "      <td>3.4</td>\n",
       "      <td>5.1</td>\n",
       "      <td>1.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>7.9</td>\n",
       "      <td>3.8</td>\n",
       "      <td>6.9</td>\n",
       "      <td>2.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   target  sepal length (cm)  sepal width (cm)  petal length (cm)  \\\n",
       "0       0                5.8               4.4                1.9   \n",
       "1       1                7.0               3.4                5.1   \n",
       "2       2                7.9               3.8                6.9   \n",
       "\n",
       "   petal width (cm)  \n",
       "0               0.6  \n",
       "1               1.8  \n",
       "2               2.5  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "iris_gb.max().reset_index()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "但其实，还有一个看上去更加友好的用法。可以在groupby的时候就设置as_index参数，也可以达到同样效果。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-03-31T14:11:38.249499Z",
     "start_time": "2021-03-31T14:11:38.239616Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>target</th>\n",
       "      <th>sepal length (cm)</th>\n",
       "      <th>sepal width (cm)</th>\n",
       "      <th>petal length (cm)</th>\n",
       "      <th>petal width (cm)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>5.8</td>\n",
       "      <td>4.4</td>\n",
       "      <td>1.9</td>\n",
       "      <td>0.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>7.0</td>\n",
       "      <td>3.4</td>\n",
       "      <td>5.1</td>\n",
       "      <td>1.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>7.9</td>\n",
       "      <td>3.8</td>\n",
       "      <td>6.9</td>\n",
       "      <td>2.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   target  sepal length (cm)  sepal width (cm)  petal length (cm)  \\\n",
       "0       0                5.8               4.4                1.9   \n",
       "1       1                7.0               3.4                5.1   \n",
       "2       2                7.9               3.8                6.9   \n",
       "\n",
       "   petal width (cm)  \n",
       "0               0.6  \n",
       "1               1.8  \n",
       "2               2.5  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "iris.groupby('target', as_index=False).max()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 多种统计量汇总\n",
    "上面都是单个统计量的操作，那如果我想同时操作好几个呢？\n",
    "\n",
    "groupby还有一个超级棒的用法就是和聚合函数agg连起来使用。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-03-31T14:11:38.266034Z",
     "start_time": "2021-03-31T14:11:38.251258Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"2\" halign=\"left\">sepal length (cm)</th>\n",
       "      <th colspan=\"2\" halign=\"left\">sepal width (cm)</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>min</th>\n",
       "      <th>mean</th>\n",
       "      <th>min</th>\n",
       "      <th>mean</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>target</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>4.3</td>\n",
       "      <td>5.006</td>\n",
       "      <td>2.3</td>\n",
       "      <td>3.428</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4.9</td>\n",
       "      <td>5.936</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.770</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4.9</td>\n",
       "      <td>6.588</td>\n",
       "      <td>2.2</td>\n",
       "      <td>2.974</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       sepal length (cm)        sepal width (cm)       \n",
       "                     min   mean              min   mean\n",
       "target                                                 \n",
       "0                    4.3  5.006              2.3  3.428\n",
       "1                    4.9  5.936              2.0  2.770\n",
       "2                    4.9  6.588              2.2  2.974"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "iris_gb[['sepal length (cm)', 'sepal width (cm)']].agg([\"min\", \"mean\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 特定列的聚合\n",
    "我们也看到了，上面是的多个操作对于每个列都是一样的。实际使用过程中，我们可能对于每个列的需求都是不一样的。\n",
    "\n",
    "所以在这种情况下，我们可以通过为不同的列单独设置不同的统计量。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-03-31T14:11:38.280775Z",
     "start_time": "2021-03-31T14:11:38.267750Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"2\" halign=\"left\">sepal length (cm)</th>\n",
       "      <th colspan=\"2\" halign=\"left\">sepal width (cm)</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>min</th>\n",
       "      <th>max</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>target</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>4.3</td>\n",
       "      <td>5.8</td>\n",
       "      <td>3.428</td>\n",
       "      <td>0.379064</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4.9</td>\n",
       "      <td>7.0</td>\n",
       "      <td>2.770</td>\n",
       "      <td>0.313798</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4.9</td>\n",
       "      <td>7.9</td>\n",
       "      <td>2.974</td>\n",
       "      <td>0.322497</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       sepal length (cm)      sepal width (cm)          \n",
       "                     min  max             mean       std\n",
       "target                                                  \n",
       "0                    4.3  5.8            3.428  0.379064\n",
       "1                    4.9  7.0            2.770  0.313798\n",
       "2                    4.9  7.9            2.974  0.322497"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "iris_gb.agg({\"sepal length (cm)\": [\"min\", \"max\"], \"sepal width (cm)\": [\"mean\", \"std\"]})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## NamedAgg命名统计量\n",
    "现在我又有新的想法了。上面的多级索引看起来有点不太友好，我想把每个列下面的统计量和列名分别合并起来。可以使用NamedAgg来完成列的命名。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-03-31T14:11:38.297129Z",
     "start_time": "2021-03-31T14:11:38.282568Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal_min</th>\n",
       "      <th>sepal_max</th>\n",
       "      <th>petal_mean</th>\n",
       "      <th>petal_std</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>target</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>4.3</td>\n",
       "      <td>5.8</td>\n",
       "      <td>1.462</td>\n",
       "      <td>0.173664</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4.9</td>\n",
       "      <td>7.0</td>\n",
       "      <td>4.260</td>\n",
       "      <td>0.469911</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4.9</td>\n",
       "      <td>7.9</td>\n",
       "      <td>5.552</td>\n",
       "      <td>0.551895</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        sepal_min  sepal_max  petal_mean  petal_std\n",
       "target                                             \n",
       "0             4.3        5.8       1.462   0.173664\n",
       "1             4.9        7.0       4.260   0.469911\n",
       "2             4.9        7.9       5.552   0.551895"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "iris_gb.agg(\n",
    "     sepal_min=pd.NamedAgg(column=\"sepal length (cm)\", aggfunc=\"min\"),\n",
    "     sepal_max=pd.NamedAgg(column=\"sepal length (cm)\", aggfunc=\"max\"),\n",
    "     petal_mean=pd.NamedAgg(column=\"petal length (cm)\", aggfunc=\"mean\"),\n",
    "     petal_std=pd.NamedAgg(column=\"petal length (cm)\", aggfunc=\"std\")\n",
    " )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "因为NamedAgg是一个元组，所以我们也可以直接赋值元组给新的命名，效果一样，但看上去更简洁。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-03-31T14:11:38.312792Z",
     "start_time": "2021-03-31T14:11:38.298701Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal_min</th>\n",
       "      <th>sepal_max</th>\n",
       "      <th>petal_mean</th>\n",
       "      <th>petal_std</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>target</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>4.3</td>\n",
       "      <td>5.8</td>\n",
       "      <td>1.462</td>\n",
       "      <td>0.173664</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4.9</td>\n",
       "      <td>7.0</td>\n",
       "      <td>4.260</td>\n",
       "      <td>0.469911</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4.9</td>\n",
       "      <td>7.9</td>\n",
       "      <td>5.552</td>\n",
       "      <td>0.551895</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        sepal_min  sepal_max  petal_mean  petal_std\n",
       "target                                             \n",
       "0             4.3        5.8       1.462   0.173664\n",
       "1             4.9        7.0       4.260   0.469911\n",
       "2             4.9        7.9       5.552   0.551895"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "iris_gb.agg(\n",
    "    sepal_min=(\"sepal length (cm)\", \"min\"),\n",
    "    sepal_max=(\"sepal length (cm)\", \"max\"),\n",
    "    petal_mean=(\"petal length (cm)\", \"mean\"),\n",
    "    petal_std=(\"petal length (cm)\", \"std\")\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 使用自定义函数\n",
    "上面agg聚合函数中我们都是通过添加一个统计量名称来完成操作的，除此之外我们也可直接给一个功能对象。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-03-31T14:11:38.333356Z",
     "start_time": "2021-03-31T14:11:38.315386Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal length (cm)</th>\n",
       "      <th>sepal width (cm)</th>\n",
       "      <th>petal length (cm)</th>\n",
       "      <th>petal width (cm)</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>target</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5.006</td>\n",
       "      <td>3.428</td>\n",
       "      <td>1.462</td>\n",
       "      <td>0.246</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5.936</td>\n",
       "      <td>2.770</td>\n",
       "      <td>4.260</td>\n",
       "      <td>1.326</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>6.588</td>\n",
       "      <td>2.974</td>\n",
       "      <td>5.552</td>\n",
       "      <td>2.026</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        sepal length (cm)  sepal width (cm)  petal length (cm)  \\\n",
       "target                                                           \n",
       "0                   5.006             3.428              1.462   \n",
       "1                   5.936             2.770              4.260   \n",
       "2                   6.588             2.974              5.552   \n",
       "\n",
       "        petal width (cm)  \n",
       "target                    \n",
       "0                  0.246  \n",
       "1                  1.326  \n",
       "2                  2.026  "
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "iris_gb.agg(pd.Series.mean)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-03-31T14:11:38.355760Z",
     "start_time": "2021-03-31T14:11:38.335263Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"2\" halign=\"left\">sepal length (cm)</th>\n",
       "      <th colspan=\"2\" halign=\"left\">sepal width (cm)</th>\n",
       "      <th colspan=\"2\" halign=\"left\">petal length (cm)</th>\n",
       "      <th colspan=\"2\" halign=\"left\">petal width (cm)</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>min</th>\n",
       "      <th>mean</th>\n",
       "      <th>min</th>\n",
       "      <th>mean</th>\n",
       "      <th>min</th>\n",
       "      <th>mean</th>\n",
       "      <th>min</th>\n",
       "      <th>mean</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>target</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>4.3</td>\n",
       "      <td>5.006</td>\n",
       "      <td>2.3</td>\n",
       "      <td>3.428</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.462</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.246</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4.9</td>\n",
       "      <td>5.936</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.770</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.260</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.326</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4.9</td>\n",
       "      <td>6.588</td>\n",
       "      <td>2.2</td>\n",
       "      <td>2.974</td>\n",
       "      <td>4.5</td>\n",
       "      <td>5.552</td>\n",
       "      <td>1.4</td>\n",
       "      <td>2.026</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       sepal length (cm)        sepal width (cm)        petal length (cm)  \\\n",
       "                     min   mean              min   mean               min   \n",
       "target                                                                      \n",
       "0                    4.3  5.006              2.3  3.428               1.0   \n",
       "1                    4.9  5.936              2.0  2.770               3.0   \n",
       "2                    4.9  6.588              2.2  2.974               4.5   \n",
       "\n",
       "              petal width (cm)         \n",
       "         mean              min   mean  \n",
       "target                                 \n",
       "0       1.462              0.1  0.246  \n",
       "1       4.260              1.0  1.326  \n",
       "2       5.552              1.4  2.026  "
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 不仅如此，名称和功能对象也可一起使用。\n",
    "iris_gb.agg([\"min\", pd.Series.mean])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-03-31T14:11:38.397821Z",
     "start_time": "2021-03-31T14:11:38.380399Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal length (cm)</th>\n",
       "      <th>sepal width (cm)</th>\n",
       "      <th>petal length (cm)</th>\n",
       "      <th>petal width (cm)</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>target</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10.012</td>\n",
       "      <td>6.856</td>\n",
       "      <td>2.924</td>\n",
       "      <td>0.492</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>11.872</td>\n",
       "      <td>5.540</td>\n",
       "      <td>8.520</td>\n",
       "      <td>2.652</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>13.176</td>\n",
       "      <td>5.948</td>\n",
       "      <td>11.104</td>\n",
       "      <td>4.052</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        sepal length (cm)  sepal width (cm)  petal length (cm)  \\\n",
       "target                                                           \n",
       "0                  10.012             6.856              2.924   \n",
       "1                  11.872             5.540              8.520   \n",
       "2                  13.176             5.948             11.104   \n",
       "\n",
       "        petal width (cm)  \n",
       "target                    \n",
       "0                  0.492  \n",
       "1                  2.652  \n",
       "2                  4.052  "
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 我们还可以自定义函数，也都是可以的。\n",
    "def double_length(x):\n",
    "    return 2*x.mean()\n",
    "\n",
    "iris_gb.agg(double_length)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-03-31T14:11:38.453214Z",
     "start_time": "2021-03-31T14:11:38.436884Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal length (cm)</th>\n",
       "      <th>sepal width (cm)</th>\n",
       "      <th>petal length (cm)</th>\n",
       "      <th>petal width (cm)</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>target</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5.006</td>\n",
       "      <td>3.428</td>\n",
       "      <td>1.462</td>\n",
       "      <td>0.246</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5.936</td>\n",
       "      <td>2.770</td>\n",
       "      <td>4.260</td>\n",
       "      <td>1.326</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>6.588</td>\n",
       "      <td>2.974</td>\n",
       "      <td>5.552</td>\n",
       "      <td>2.026</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        sepal length (cm)  sepal width (cm)  petal length (cm)  \\\n",
       "target                                                           \n",
       "0                   5.006             3.428              1.462   \n",
       "1                   5.936             2.770              4.260   \n",
       "2                   6.588             2.974              5.552   \n",
       "\n",
       "        petal width (cm)  \n",
       "target                    \n",
       "0                  0.246  \n",
       "1                  1.326  \n",
       "2                  2.026  "
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 如果想更简洁，也可以使用lambda函数。总之，用法非常灵活，可以自由组合搭配。\n",
    "iris_gb.agg(lambda x: x.mean())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.6"
  },
  "nbTranslate": {
   "displayLangs": [
    "*"
   ],
   "hotkey": "alt-t",
   "langInMainMenu": true,
   "sourceLang": "en",
   "targetLang": "fr",
   "useGoogleTranslate": true
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {
    "height": "calc(100% - 180px)",
    "left": "10px",
    "top": "150px",
    "width": "349px"
   },
   "toc_section_display": true,
   "toc_window_display": true
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
